準備資料

Part1:基本概況

1-1.訂單數量

## [1] 99441     2

1-2.賣家數量及地理分布

## [1] 2962

1-3.買家數量及地理分布

## [1] 92458

## [1] 96096     2
## 
##     1     2     3     4     5     6     7     9    17 
## 0.969 0.029 0.002 0.000 0.000 0.000 0.000 0.000 0.000
## 
##            housewares computers_accessories       furniture_decor 
##                  6527                  7275                  7513 
##        sports_leisure         health_beauty        bed_bath_table 
##                  7954                  9103                 10001

1-4.產品類別銷售及地理區域市佔情形

Part2:每年變化趨勢分析

2-1.每月的訂單數,新進的買、賣家數趨勢變化

2-2.泡泡圖

Part3:地區下的銷售/評分情況

3-1.地理區域和評分與銷售額的互動圖

#geolocation: private dataframe, with state,lon and lat of each city
geolocation <- olist_geolocation_dataset.csv
geolocation['seller_city'] = geo['geolocation_city']
geolocation['state'] = geo["geolocation_state"]
geolocation['geolocation_zip_code_prefix'] = NULL
geolocation <- geo %>% 
group_by(geolocation_city) %>% 
summarise(
  lat = mean(geolocation_lat),
  lng = mean(geolocation_lng),
  seller_city = geolocation_city[1],
  state = geolocation_state[1]
  )

#draw review of each state and use addPolygon to fig
city_score <- merge(score,geolocation,by="seller_city")
state_score <- city_score %>%
group_by(state) %>%
summarise(
  score = mean(score),
  pricesum = sum(pricesum)
)

#import Brazill json data 
states <- geojsonio::geojson_read("C:\\Users\\User\\Desktop\\Group3\\midterm\\Brazil.json", what = "sp")

#color setting
bins_score <- c(0,1.0,2.0,3.0,4.0,5.0)
bins_money <- c(0,500,5000,10000,50000,100000,1000000,Inf)
pal_score <- colorBin("Blues", domain = state_score$score, bins = bins_score)
pal_money <- colorBin("Reds"  ,domain = state_score$pricesum, bins = bins_money)

#add score Info of State
labels <- sprintf(
  "<strong>%s</strong><br/> score: %g <br/> total price: %g ",
  state_score$state, 
  state_score$score,
  state_score$pricesum
) %>% lapply(htmltools::HTML)

citylabels <- sprintf(
  "<strong>city: %s</strong><br/> score: %g <br/> total price: %g ",
  city_score$seller_city, 
  city_score$score,
  city_score$pricesum
)%>% lapply(htmltools::HTML)

#form a map of Brazil 
map_Brazil <- leaflet(states) %>%
  setView(lat=-22.074022, lng=-48.74026, zoom = 4)  %>%
  addProviderTiles("MapBox",group = "map", options = providerTileOptions(
    id = "mapbox.light",
    accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN'))) 

#draw city inform
map_Brazil <- map_Brazil %>%  
  addProviderTiles("MapBox",group = "city", options = providerTileOptions(
    id = "mapbox.light",
    accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN'))) %>% 
  addMarkers(
    lng     = city_score$lng,
    lat     = city_score$lat,
    label   = citylabels,
    labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 4px"),
    textsize = "15px",
    direction = "auto"),
    clusterOptions = markerClusterOptions(),
    options = popupOptions(closeButton = TRUE),
    group = "city"
  )      

#draw state labels

map_Brazil <- map_Brazil %>% 
#add state score information 
addPolygons(
  fillColor = ~pal_score(state_score$score),
  weight = 2,
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlight = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = labels,
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto"),
  group = "state_score"
  )  %>%
#add state price information 
  addPolygons(
  fillColor = ~pal_money(state_score$pricesum),
  weight = 2,
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlight = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = labels,
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto"),
  group = "state_price"
  ) %>% 
#add legends 
  addLegend(pal = pal_money, values = ~state_score$pricesum ,opacity =0.7,title = "price legend",
    position = "bottomleft",group = "state_price" ) %>%
  addLegend(pal = pal_score, values = ~state_score$score, opacity = 0.7, title = "score legend",
    position = "bottomleft",group = "state_score") %>% 
# add layer Control 
  addLayersControl(
    overlayGroups  = c("city","state_score","state_price") 
  ) %>%
  hideGroup("state_price")  %>%
# add mini map
  addProviderTiles(providers$Esri.WorldStreetMap) %>%
  addMiniMap(
    tiles = providers$Esri.WorldStreetMap,
    toggleDisplay = TRUE)

Part4:評分迴歸模型

4-1.對評分的回歸模型

## 
## Call:
## lm(formula = score ~ ., data = score_lm[, c(2:15)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1415 -0.4159  0.5115  0.7576  8.7138 
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.467e+02  1.667e+01   8.797  < 2e-16 ***
## ship13           -4.935e-02  5.077e-04 -97.207  < 2e-16 ***
## ship12           -5.992e-03  1.248e-03  -4.800 1.59e-06 ***
## ship23                   NA         NA      NA       NA    
## ship_ratio       -1.026e-01  1.367e-02  -7.507 6.11e-14 ***
## price            -1.803e-04  1.989e-05  -9.068  < 2e-16 ***
## AVEvolume         5.964e-07  1.449e-07   4.117 3.85e-05 ***
## description       1.746e-05  5.592e-06   3.122  0.00180 ** 
## photo_num         3.931e-04  2.038e-03   0.193  0.84703    
## pay_installment  -5.247e-03  1.555e-03  -3.374  0.00074 ***
## dist              2.896e-05  1.092e-05   2.653  0.00797 ** 
## customer_stateAL -3.049e-02  1.519e-01  -0.201  0.84094    
## customer_stateAM  4.792e-01  1.722e-01   2.783  0.00539 ** 
## customer_stateAP  4.781e-01  2.030e-01   2.355  0.01853 *  
## customer_stateBA -2.006e-01  1.407e-01  -1.426  0.15385    
## customer_stateCE -8.222e-02  1.431e-01  -0.574  0.56564    
## customer_stateDF -3.121e-01  1.417e-01  -2.203  0.02757 *  
## customer_stateES -2.290e-01  1.419e-01  -1.614  0.10651    
## customer_stateGO -2.120e-01  1.419e-01  -1.493  0.13534    
## customer_stateMA -1.838e-01  1.464e-01  -1.256  0.20923    
## customer_stateMG -3.013e-01  1.398e-01  -2.156  0.03113 *  
## customer_stateMS -1.398e-01  1.465e-01  -0.954  0.33986    
## customer_stateMT -4.708e-02  1.452e-01  -0.324  0.74583    
## customer_statePA -5.394e-03  1.447e-01  -0.037  0.97025    
## customer_statePB -1.621e-02  1.490e-01  -0.109  0.91339    
## customer_statePE -8.902e-02  1.423e-01  -0.626  0.53158    
## customer_statePI -1.089e-01  1.500e-01  -0.726  0.46804    
## customer_statePR -2.555e-01  1.404e-01  -1.819  0.06889 .  
## customer_stateRJ -3.674e-01  1.397e-01  -2.629  0.00856 ** 
## customer_stateRN  2.469e-02  1.502e-01   0.164  0.86945    
## customer_stateRO  7.345e-02  1.600e-01   0.459  0.64622    
## customer_stateRR  3.445e-01  2.407e-01   1.431  0.15239    
## customer_stateRS -1.548e-01  1.402e-01  -1.104  0.26964    
## customer_stateSC -2.168e-01  1.408e-01  -1.540  0.12359    
## customer_stateSE -1.144e-01  1.543e-01  -0.741  0.45855    
## customer_stateSP -3.909e-01  1.396e-01  -2.799  0.00512 ** 
## customer_stateTO -3.208e-02  1.575e-01  -0.204  0.83863    
## seller_stateBA    3.103e-01  7.018e-01   0.442  0.65835    
## seller_stateCE    5.480e-01  7.122e-01   0.769  0.44161    
## seller_stateDF    1.547e-01  7.011e-01   0.221  0.82532    
## seller_stateES    2.156e-01  7.033e-01   0.307  0.75922    
## seller_stateGO    4.409e-01  7.021e-01   0.628  0.53003    
## seller_stateMA    3.084e-01  7.025e-01   0.439  0.66065    
## seller_stateMG    3.030e-01  6.999e-01   0.433  0.66504    
## seller_stateMS    4.792e-01  7.227e-01   0.663  0.50731    
## seller_stateMT    3.493e-01  7.075e-01   0.494  0.62158    
## seller_statePA    6.677e-01  8.204e-01   0.814  0.41572    
## seller_statePB    2.700e-01  7.339e-01   0.368  0.71299    
## seller_statePE    2.939e-01  7.026e-01   0.418  0.67578    
## seller_statePI    4.783e-01  7.894e-01   0.606  0.54454    
## seller_statePR    3.045e-01  6.999e-01   0.435  0.66350    
## seller_stateRJ    2.887e-01  7.001e-01   0.412  0.68003    
## seller_stateRN    1.840e-01  7.205e-01   0.255  0.79847    
## seller_stateRO    4.848e-02  8.568e-01   0.057  0.95487    
## seller_stateRS    3.207e-01  7.003e-01   0.458  0.64697    
## seller_stateSC    3.108e-01  7.001e-01   0.444  0.65708    
## seller_stateSE   -3.641e-01  8.204e-01  -0.444  0.65721    
## seller_stateSP    1.824e-01  6.998e-01   0.261  0.79432    
## time_group       -7.028e-04  8.259e-05  -8.510  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.211 on 93393 degrees of freedom
## Multiple R-squared:  0.1247, Adjusted R-squared:  0.1242 
## F-statistic: 233.4 on 57 and 93393 DF,  p-value: < 2.2e-16

4-2.相關係數

##                       ship13       ship12       ship23  ship_ratio
## ship13           1.000000000  0.386467622  0.928326482  0.07662941
## ship12           0.386467622  1.000000000  0.015887260 -0.02122266
## ship23           0.928326482  0.015887260  1.000000000  0.09162889
## ship_ratio       0.076629410 -0.021222657  0.091628893  1.00000000
## price            0.048197974  0.066818713  0.025318109 -0.27239650
## AVEvolume        0.062146732  0.118137527  0.019754219 -0.08065473
## description      0.005079196  0.005716356  0.003202211 -0.11409592
## photo_num       -0.022481239 -0.035293149 -0.010145917 -0.04701655
## pay_installment  0.074208135  0.057819481  0.057143319 -0.18573847
## dist             0.267048036  0.008095524  0.286245083  0.14047168
## time_group      -0.059746756 -0.080666577 -0.032256348  0.00938864
## score           -0.336192046 -0.154493835 -0.302193607 -0.02709736
##                        price   AVEvolume  description   photo_num
## ship13           0.048197974  0.06214673  0.005079196 -0.02248124
## ship12           0.066818713  0.11813753  0.005716356 -0.03529315
## ship23           0.025318109  0.01975422  0.003202211 -0.01014592
## ship_ratio      -0.272396502 -0.08065473 -0.114095916 -0.04701655
## price            1.000000000  0.30940086  0.267643426  0.11663583
## AVEvolume        0.309400865  1.00000000  0.115844242  0.10944865
## description      0.267643426  0.11584424  1.000000000  0.22384301
## photo_num        0.116635834  0.10944865  0.223843009  1.00000000
## pay_installment  0.316193547  0.17100803  0.074115328  0.03766222
## dist             0.112570130  0.05975062  0.139228914  0.11028820
## time_group      -0.004081408 -0.03937762  0.022193502 -0.01354552
## score           -0.031142695 -0.01878400  0.011170128  0.01252416
##                 pay_installment         dist   time_group        score
## ship13               0.07420813  0.267048036 -0.059746756 -0.336192046
## ship12               0.05781948  0.008095524 -0.080666577 -0.154493835
## ship23               0.05714332  0.286245083 -0.032256348 -0.302193607
## ship_ratio          -0.18573847  0.140471682  0.009388640 -0.027097359
## price                0.31619355  0.112570130 -0.004081408 -0.031142695
## AVEvolume            0.17100803  0.059750624 -0.039377624 -0.018784003
## description          0.07411533  0.139228914  0.022193502  0.011170128
## photo_num            0.03766222  0.110288200 -0.013545517  0.012524155
## pay_installment      1.00000000  0.087421435 -0.060023291 -0.030452988
## dist                 0.08742144  1.000000000 -0.035967577 -0.038081568
## time_group          -0.06002329 -0.035967577  1.000000000 -0.007807136
## score               -0.03045299 -0.038081568 -0.007807136  1.000000000